Code
library(RRphylo)
library(manipulate)
library(ape)
library(phytools)
library(ggtree)
library(tidyverse)
library(RColorBrewer)
library(ggnewscale)
library(patchwork)
source("scripts/metadata_colors.R")Libraries
library(RRphylo)
library(manipulate)
library(ape)
library(phytools)
library(ggtree)
library(tidyverse)
library(RColorBrewer)
library(ggnewscale)
library(patchwork)
source("scripts/metadata_colors.R")Metadata
metadata_all_path <-
"data/processed/metadata_all_H99_complete.csv"Trees
desj_tree_path <-
"data/raw/CryptoDiversity_Desjardins_Tree.tre"
desj_tree_out_path <-
"data/processed/tree_desjardins.newick"
ashton_tree_path <-
"data/raw/2017.06.09.all_ours_and_desj.snp_sites.mod.fa.cln.tree"
ashton_tree_out_path <-
"data/processed/tree_ashton.newick"
merged_tree_out_path <-
"data/processed/tree_merged.newick"Plots
desj_tree_out_plot <-
"results/trees/tree_desjardins.png"
desj_tree_out_plot_pdf <-
"results/trees/tree_desjardins.pdf"
ashton_tree_out_plot <-
"results/trees/tree_ashton.png"
ashton_tree_out_plot_pdf <-
"results/trees/tree_ashton.pdf"
merged_tree_plot <-
"results/trees/tree_merged.png"
merged_tree_plot_pdf <-
"results/trees/tree_merged.pdf"
merged_tree_removed_plot_png <-
"results/trees/tree_merged_removed.png"
merged_tree_small_plot <-
"results/trees/tree_merged_small.png"Use the metadata table that has all (even the ones that will be excluded) the samples of Ashton and Desjardins and H99 (n = 1064).
metadata <- read.delim(
metadata_all_path,
header=TRUE,
sep=",") %>%
select(strain, everything())
metadata$mating_type <-ifelse(metadata$mating_type == "", NA, metadata$mating_type)
summary <- metadata %>%
group_by(dataset, lineage) %>%
summarize(count = n())
summary| dataset | lineage | count |
|---|---|---|
| Ashton | AD_hybrid | 5 |
| Ashton | VNI | 678 |
| Ashton | VNII | 4 |
| Ashton | gattii | 12 |
| Desjardins | VNBI | 122 |
| Desjardins | VNBII | 64 |
| Desjardins | VNI | 185 |
| Desjardins | VNII | 16 |
| Reference | VNI | 1 |
Make separate dataframes for each metadata field.
metadata$vni_subdivision <- factor(metadata$vni_subdivision,
levels = c(names(sublineage_colors), "VNIa-outlier"))
metadata$country_of_origin <- factor(metadata$country_of_origin,
levels = names(country_colors))
sublineage <- metadata %>%
filter(lineage == "VNI")%>%
select(strain, vni_subdivision)%>%
column_to_rownames("strain")%>%
droplevels()
lineage <- metadata %>%
select(strain, lineage)%>%
column_to_rownames("strain")
dataset <- metadata %>%
select(strain, dataset)%>%
column_to_rownames("strain")
source <- metadata %>%
select(strain, source)%>%
column_to_rownames("strain")
country <- metadata %>%
select(strain, country_of_origin)%>%
column_to_rownames("strain")
mating_type <- metadata %>%
select(strain, mating_type)%>%
column_to_rownames("strain")
ploidy <- metadata %>%
select(strain, ploidy)%>%
column_to_rownames("strain")
quality <- metadata %>%
select(strain, quality_warning)%>%
column_to_rownames("strain")Import the raw Desjardins tree
desj_tree <- read.tree(desj_tree_path)Reroot the tree at the middle of the branch leading to VNII
VNII_root <- getMRCA(desj_tree, c("C2","C12"))
edge_length <- subset(desj_tree$edge.length, desj_tree$edge[,2] == VNII_root)
desj_tree <- reroot(desj_tree, VNII_root, edge_length/2)Write rooted Desjardins tree
write.tree(desj_tree, file = desj_tree_out_path)Keep only the names of the countries in the Desjardins dataset to have a proper legend.
country_desj <- levels(droplevels(country[rownames(country) %in% desj_tree$tip.label, ]))Get lineage nodes for plot
VNI_node <- getMRCA(desj_tree, c("Tu241-1","Bt164"))
VNII_node <- getMRCA(desj_tree, c("C2","C12"))
VNBI_node <- getMRCA(desj_tree, c("Tu229-1","Ftc267-2"))
VNBII_node <- getMRCA(desj_tree, c("MW-RSA3321","MW-RSA3179"))
nodes_lineages <- data.frame(
lineage = c("VNI", "VNII", "VNBI", "VNBII"),
mrca = c(VNI_node, VNII_node, VNBI_node, VNBII_node)
)VNIa4_node <- getMRCA(desj_tree, c("Ug2463","Bt164"))
VNIa5_node <- getMRCA(desj_tree, c("NRHc5025.ENR.CLIN.1","AD1-83a"))
VNIa93_node <- getMRCA(desj_tree, c("RTC1","Br795"))
VNIa32_node <- getMRCA(desj_tree, c("A4-34-6","In2632"))
VNIaX_node <- getMRCA(desj_tree, c("Bt48","MW-RSA36"))
VNIaY_node <- getMRCA(desj_tree, c("Bt18","Bt138"))
VNIb_node <- getMRCA(desj_tree, c("A1-84-14","MW-RSA722"))
VNIc_node <- getMRCA(desj_tree, c("Bt20","Bt11"))
nodes_vnisublineages <- data.frame(
sublineage = c(
"VNIa-4", "VNIa-5", "VNIa-93",
"VNIa-32", "VNIa-X", "VNIa-Y",
"VNIb", "VNIc"),
mrca = c(
VNIa4_node, VNIa5_node, VNIa93_node,
VNIa32_node, VNIaX_node, VNIaY_node,
VNIb_node, VNIc_node))d <- ggtree(desj_tree,
ladderize = TRUE,
layout = "circular",
size = 0.09) %<+% metadata +
geom_tiplab(color = "black", size = 0.6, align = TRUE, linesize = 0.05)+
geom_text(aes(label = nodes_lineages$lineage[match(node, nodes_lineages$mrca)]),
size = 2, , fontface = "bold",
hjust = 0.3, vjust = -0.5)+
geom_hilight(data=nodes_vnisublineages,
aes(node=mrca, fill=sublineage), alpha = 0.8)+
scale_fill_manual(name = "Sublineage", values = sublineage_shading)+
guides(fill = FALSE)+
new_scale_fill()+
geom_tree(size = 0.1)+
geom_tippoint(aes(shape = mating_type, color = source),
size = 0.5)+
scale_color_manual(name = "Source", values = source_colors, na.value = "gray90")+
scale_shape_manual(name = "MAT", values = mat_shapes, na.value = 18)+
guides(shape = guide_legend(order = 1), #override.aes = list(linewidth = 0.01),
color = guide_legend(override.aes = list(size = 5), order = 2))+
geom_treescale(x=-0.03, y=0, width=0.01, fontsize =2, offset=30)
dc <- gheatmap(d, country, width =.08, colnames=FALSE, offset=0.07)+
scale_fill_manual(values=country_colors,
name="Country", na.translate=TRUE,
limits = country_desj)+
guides(fill = guide_legend(order = 3, ncol = 1))+
geom_cladelab(data = nodes_vnisublineages,
mapping = aes(node = mrca, label = sublineage),
fontsize = 3,
align = TRUE, face = "bold",
angle = "auto", offset = 0.11)+
theme(legend.position = "right",
legend.direction = "vertical")
dcSave Desjardins tree plot to file.
ggsave(desj_tree_out_plot, dc, height = 6.5, width = 7, units = "in", dpi = 600)
ggsave(desj_tree_out_plot_pdf, dc, height = 6.5, width = 7, units = "in", dpi = 600)Import the raw Ashton tree
ashton_tree_unrooted <- read.tree(ashton_tree_path)Rename tips of the Desjardins samples because they have SRA run accessions in this tree but strain names in the other one
ashton_tree_unrooted$tip.label <- sapply(ashton_tree_unrooted$tip.label, function(x) {
if (x %in% metadata$run) {
metadata$strain[metadata$run == x]
} else {
x
}
})Get the samples that are present in the tree but absent from the metadata of the full dataset
setdiff(ashton_tree_unrooted$tip.label, metadata$strain)[1] "04CN-63-018"
Root Ashton tree at the middle of the branch leading to VNIa
VNIa_root <- getMRCA(ashton_tree_unrooted, c("AD3-95a","Tu259-1"))
edge_length <- subset(ashton_tree_unrooted$edge.length,
ashton_tree_unrooted$edge[,2] == VNIa_root)
ashton_tree <- reroot(ashton_tree_unrooted, VNIa_root, edge_length/2)Write rooted Ashton tree
write.tree(ashton_tree, file = ashton_tree_out_path)VNIa4_node <- getMRCA(ashton_tree, c("CNS_289","04CN-30-008"))
VNIa5_node <- getMRCA(ashton_tree, c("14936_1#45","20427_2#16"))
VNIa93_node <- getMRCA(ashton_tree, c("04CN-65-080","04CN-65-002"))
VNIa32_node <- getMRCA(ashton_tree, c("20427_2#33","20427_2#5"))
VNIaX_node <- getMRCA(ashton_tree, c("04CN-63-007","20427_3#40"))
VNIaY_node <- getMRCA(ashton_tree, c("04CN-65-073","04CN-64-081"))
VNIb_node <- getMRCA(ashton_tree, c("AD3-41a","H99"))
VNIc_node <- getMRCA(ashton_tree, c("Bt20","Bt11"))
nodes_vnisublineages <- data.frame(
sublineage = c(
"VNIa-4", "VNIa-5", "VNIa-93",
"VNIa-32", "VNIa-X", "VNIa-Y",
"VNIb", "VNIc"),
mrca = c(
VNIa4_node, VNIa5_node, VNIa93_node,
VNIa32_node, VNIaX_node, VNIaY_node,
VNIb_node, VNIc_node))VNIa4_node <- getMRCA(ashton_tree_unrooted, c("CNS_289","04CN-30-008"))
VNIa5_node <- getMRCA(ashton_tree_unrooted, c("14936_1#45","20427_2#16"))
VNIa93_node <- getMRCA(ashton_tree_unrooted, c("04CN-65-080","04CN-65-002"))
VNIa32_node <- getMRCA(ashton_tree_unrooted, c("20427_2#33","20427_2#5"))
VNIaX_node <- getMRCA(ashton_tree_unrooted, c("04CN-63-007","20427_3#40"))
VNIaY_node <- getMRCA(ashton_tree_unrooted, c("04CN-65-073","04CN-64-081"))
VNIb_node <- getMRCA(ashton_tree_unrooted, c("AD3-41a","H99"))
VNIc_node <- getMRCA(ashton_tree_unrooted, c("Bt20","Bt11"))
nodes_vnisublineages_unrooted <- data.frame(
sublineage = c(
"VNIa-4", "VNIa-5", "VNIa-93",
"VNIa-32", "VNIa-X", "VNIa-Y",
"VNIb", "VNIc"),
mrca = c(
VNIa4_node, VNIa5_node, VNIa93_node,
VNIa32_node, VNIaX_node, VNIaY_node,
VNIb_node, VNIc_node))pu <- ggtree(ashton_tree_unrooted, layout = "circular", size = 0.1) +
geom_hilight(data=nodes_vnisublineages_unrooted,
aes(node=mrca, fill=sublineage), alpha = 0.8)+
scale_fill_manual(name = "Sublineage", values = sublineage_shading)+
geom_cladelab(data = nodes_vnisublineages_unrooted,
mapping = aes(node = mrca, label = sublineage),
fontsize = 3,
align = TRUE, face = "bold",
angle = "auto", offset = 0.01)+
labs(title = "Unrooted")+
theme(legend.position = "none",
plot.title = element_text(hjust = 0.5))
p <- ggtree(ashton_tree, layout = "circular", size =0.1)+
geom_hilight(data=nodes_vnisublineages,
aes(node=mrca, fill=sublineage), alpha = 0.8)+
scale_fill_manual(name = "Sublineage", values = sublineage_shading)+
geom_cladelab(data = nodes_vnisublineages,
mapping = aes(node = mrca, label = sublineage),
fontsize = 3,
align = TRUE, face = "bold",
angle = "auto", offset = 0.01)+
labs(title = "Rooted")+
theme(legend.position = "none",
plot.title = element_text(hjust = 0.5))
pu | p m <- ggtree(ashton_tree, layout = "circular", size = 0.1) +
geom_tiplab(aes(label = label), size = 0.5, align =TRUE,
linetype = "dashed", linesize = .03)+
geom_treescale(x=-0.03, y=0, width=0.01, fontsize =2, offset=30)+
geom_hilight(data=nodes_vnisublineages,
aes(node=mrca, fill=sublineage), alpha = 0.8)+
scale_fill_manual(name = "Sublineage", values = sublineage_shading)+
guides(fill = FALSE)+
new_scale_fill()
m1 <- gheatmap(m, dataset, width=.05, colnames=FALSE, offset=.025) +
scale_fill_manual(values = dataset_colors, name="Dataset", na.translate = FALSE)+
guides(fill = guide_legend(order = 1))+
new_scale_fill()
m2 <- gheatmap(m1, source, width=.05, colnames=FALSE, offset=0.035) +
scale_fill_manual(values = source_colors, name="Source", na.translate = FALSE)+
guides(fill = guide_legend(order = 3))+
labs(title = "Ashton tree")+
new_scale_fill()
m3 <- gheatmap(m2, country, width =.05, colnames=FALSE,offset=0.045)+
scale_fill_manual(values=country_colors,
name="Country", na.translate=FALSE,
limits = levels(country$country_of_origin))+
guides(fill = guide_legend(order =4))+
theme(legend.position = "right",
legend.direction = "vertical",
legend.title = element_text(size=9),
legend.text=element_text(size=7),
legend.key.size = unit(0.3, "cm"),
plot.margin = margin(0, 0, 0, 0, "cm"))+
geom_cladelab(data = nodes_vnisublineages,
mapping = aes(node = mrca, label = sublineage),
fontsize = 3,
align = TRUE, face = "bold",
angle = "auto", offset = 0.06)
m3ggsave(ashton_tree_out_plot, m3, height = 6.5, width = 7, units = "in", dpi = 600)
ggsave(ashton_tree_out_plot_pdf, m3, height = 6.5, width = 7, units = "in", dpi = 600)Specify clades in Desjardins tree
VNI <- c("Bt92", "Bt79")
VNI_node <- getMRCA(desj_tree, VNI)
VNII <- c("C2","C12")
VNII_node <- getMRCA(desj_tree, VNII)
VNB <- c("Bt7", "Bt34")
VNB_node <- getMRCA(desj_tree, VNB)Get the ages of the nodes from the original Desjardins tree. This is to attempt to have a calibrated tree, but the resulting branchlengths are not accurate.
edge_lengths <- node.depth.edgelength(desj_tree)
node_labels <- c(desj_tree$tip.label, desj_tree$node.label)
edge_length_mapping <- data.frame(
node = node_labels,
edge_length = edge_lengths,
max_length = max(edge_lengths))
edge_length_mapping <- edge_length_mapping %>%
mutate(age = max_length - edge_length) %>%
rownames_to_column("node_id")
clade_ages <- edge_length_mapping %>%
filter(node_id %in% c(VNI_node, VNII_node, VNB_node))
nodeages <- c("Bt92-Bt79" = clade_ages$age[clade_ages$node_id == VNI_node],
"C2-C12" = clade_ages$age[clade_ages$node_id == VNII_node],
"Bt7-Bt34" = clade_ages$age[clade_ages$node_id == VNB_node])
tip_ages <- edge_length_mapping %>%
filter(node %in% metadata$strain)
tipages <- tip_ages$age
names(tipages) <- tip_ages$nodeRemove VNI clade from Desjardins tree to use it as backtree
VNI_tips <- tips(desj_tree, VNI_node)
backtree <- drop.tip(desj_tree, VNI_tips)Create the reference tables
reference <- data.frame(bind=c("CNS_289-20427_2#4"),
reference=c("Bt7-Bt34"),
poly=c(FALSE))Merge
merged <- tree.merger(backbone = backtree,
data=reference,
source.tree = ashton_tree,
plot=FALSE,
node.ages = nodeages,
tip.ages = tipages)Write merged tree to file
write.tree(merged, file = merged_tree_out_path)Get lineage and sublineage nodes for plot
VNI_node <- getMRCA(merged, c("Tu241-1","UI_31647-2"))
VNII_node <- getMRCA(merged, c("C2","C12"))
VNBI_node <- getMRCA(merged, c("Tu229-1","Ftc267-2"))
VNBII_node <- getMRCA(merged, c("MW-RSA3321","MW-RSA3179"))
nodes_lineages <- data.frame(
lineage = c("VNI", "VNII", "VNBI", "VNBII"),
mrca = c(VNI_node, VNII_node, VNBI_node, VNBII_node)
)VNIa4_node <- getMRCA(merged, c("04CN-30-008","UI_31647-2"))
VNIa5_node <- getMRCA(merged, c("BMD852","14936_1#45"))
VNIa93_node <- getMRCA(merged, c("04CN-65-080","04CN-65-002"))
VNIa32_node <- getMRCA(merged, c("BMD942","BMD2801"))
VNIaX_node <- getMRCA(merged, c("Bt48","04CN-63-007"))
VNIaY_node <- getMRCA(merged, c("04CN-65-073","Bt138"))
VNIb_node <- getMRCA(merged, c("04CN-65-096","MW-RSA722"))
VNIc_node <- getMRCA(merged, c("Bt20","Bt11"))
nodes_vnisublineages <- data.frame(
sublineage = c(
"VNIa-4", "VNIa-5", "VNIa-93",
"VNIa-32", "VNIa-X", "VNIa-Y",
"VNIb", "VNIc"),
mrca = c(
VNIa4_node, VNIa5_node, VNIa93_node,
VNIa32_node, VNIaX_node, VNIaY_node,
VNIb_node, VNIc_node))nodes_sublineages <- data.frame(
sublineage = c("VNI", "VNII", "VNBI", "VNBII",
"VNIa-4", "VNIa-5", "VNIa-93",
"VNIa-32", "VNIa-X", "VNIa-Y",
"VNIb", "VNIc"),
mrca = c(VNI_node, VNII_node, VNBI_node, VNBII_node,
VNIa4_node, VNIa5_node, VNIa93_node,
VNIa32_node, VNIaX_node, VNIaY_node,
VNIb_node, VNIc_node))Base
m <- ggtree(merged,
ladderize = TRUE,
layout = "circular",
branch.length = "none",
size = 0.1) %<+% metadata +
geom_tiplab(color = "black", size = 0.5, offset = 0.01)+
geom_text(aes(label = nodes_lineages$lineage[match(node, nodes_lineages$mrca)]),
size = 2, , fontface = "bold",
hjust = 1.25, vjust = -0.5)+
geom_hilight(data=nodes_vnisublineages,
aes(node=mrca, fill=sublineage), alpha = 0.8)+
scale_fill_manual(name = "Sublineage", values = sublineage_shading)+
guides(fill = FALSE)+
new_scale_fill()+
geom_tree(size = 0.1)+
geom_tippoint(aes(color = dataset),
size = 0.3)+
scale_color_manual(values = dataset_colors, name="Dataset", na.translate = FALSE)+
guides(color = guide_legend(override.aes = list(size = 5), order = 1))
mc <- m + geom_cladelab(data = nodes_vnisublineages,
mapping = aes(node = mrca, label = sublineage),
align = TRUE, face = "bold",
angle = "auto", offset = 3)With country , MAT, and source
m1 <- gheatmap(m, country, width =.05, colnames=FALSE,offset=3)+
scale_fill_manual(values=country_colors,
name="Country", na.translate=FALSE,
limits = levels(country$country_of_origin))+
guides(fill = guide_legend(order = 2, ncol = 2))+
new_scale_fill()
m2 <- gheatmap(m1, source, width=.05, colnames=FALSE, offset=5) +
scale_fill_manual(values = source_colors,
name="Source", na.translate = FALSE)+
guides(fill = guide_legend(order = 3))+
new_scale_fill()
m3 <- gheatmap(m2, mating_type, width=.05, colnames=FALSE, offset=7) +
scale_fill_manual(values = mat_colors,
name="Mating type", na.translate = FALSE)+
guides(fill = guide_legend(order = 4))+
new_scale_fill()+
geom_cladelab(data = nodes_vnisublineages,
mapping = aes(node = mrca, label = sublineage),
align = TRUE, face = "bold",
angle = "auto", offset = 10)+
theme(legend.position = "bottom", legend.direction = "vertical")
m3ggsave(merged_tree_plot, m3, height = 10, width = 10, units = "in", dpi = 600)
ggsave(merged_tree_plot_pdf, m3, height = 10, width = 10, units = "in", dpi = 600)Show samples removed by ploidy and quality
mp <- gheatmap(m, ploidy, width=.05, colnames=FALSE, offset=3) +
scale_fill_brewer(name = "Ploidy", palette = "Dark2", na.value = "gray90")+
guides(fill = guide_legend(order = 4))+
new_scale_fill()
mq <- gheatmap(mp, quality, width=.05, colnames=FALSE, offset=5) +
scale_fill_brewer(name = "Filtered", palette = "Set1", na.value = "gray90") +
guides(fill = guide_legend(order = 5))+
new_scale_fill()+
geom_cladelab(data = nodes_vnisublineages,
mapping = aes(node = mrca, label = sublineage),
align = TRUE, face = "bold",
angle = "auto", offset = 7.3)
mqggsave(merged_tree_removed_plot_png, mq, height = 10, width = 10, units = "in", dpi = 600)Get two samples of each non-VNI lineage, VNI sublineage, and all VNIa-outlier
VNIa_outlier <- metadata %>%
filter(vni_subdivision == "VNIa-outlier")
representatives <- c("Tu241-1","UI_31647-2","C2","C12","Tu229-1",
"Ftc267-2","MW-RSA3321","MW-RSA3179","04CN-30-008",
"UI_31647-2","BMD852","14936_1#45","04CN-65-080",
"04CN-65-002","BMD942","BMD2801","Bt48",
"04CN-63-007","04CN-65-073","Bt138",
"04CN-65-096","MW-RSA722","Bt20","Bt11")
tips <- c(VNIa_outlier$strain, representatives)Make a small version of the merged tree only with the tips in tips
small_tree <- drop.tip(merged, setdiff(merged$tip.label, tips))VNI_node <- getMRCA(small_tree, c("Tu241-1","UI_31647-2"))
VNII_node <- getMRCA(small_tree, c("C2","C12"))
VNBI_node <- getMRCA(small_tree, c("Tu229-1","Ftc267-2"))
VNBII_node <- getMRCA(small_tree, c("MW-RSA3321","MW-RSA3179"))
VNIa4_node <- getMRCA(small_tree, c("04CN-30-008","UI_31647-2"))
VNIa5_node <- getMRCA(small_tree, c("BMD852","14936_1#45"))
VNIa93_node <- getMRCA(small_tree, c("04CN-65-080","04CN-65-002"))
VNIa32_node <- getMRCA(small_tree, c("BMD942","BMD2801"))
VNIaX_node <- getMRCA(small_tree, c("Bt48","04CN-63-007"))
VNIaY_node <- getMRCA(small_tree, c("04CN-65-073","Bt138"))
VNIb_node <- getMRCA(small_tree, c("04CN-65-096","MW-RSA722"))
VNIc_node <- getMRCA(small_tree, c("Bt20","Bt11"))
nodes_sublineages <- data.frame(
sublineage = c("VNI", "VNII", "VNBI", "VNBII",
"VNIa-4", "VNIa-5", "VNIa-93",
"VNIa-32", "VNIa-X", "VNIa-Y",
"VNIb", "VNIc"),
mrca = c(VNI_node, VNII_node, VNBI_node, VNBII_node,
VNIa4_node, VNIa5_node, VNIa93_node,
VNIa32_node, VNIaX_node, VNIaY_node,
VNIb_node, VNIc_node))p <- ggtree(small_tree, layout = "rectangular", size = 1, branch.length = "none") +
xlim(0, 11)+
geom_tiplab(aes(label = label),
size = 3, align = TRUE, linetype = "dashed",
linesize = .05,
offset = 0, # increase offset for more space
align.length = 2 # adjust width of the area for tiplabs
) +
geom_text(aes(label = nodes_sublineages$sublineage[match(node, nodes_sublineages$mrca)]),
size = 3, fontface = "bold",
hjust = 1.25, vjust = -0.5)
pggsave(merged_tree_small_plot, p, height = 6, width = 7, units = "in", dpi = 600)